library(tidyverse)
library(haven)
library(stargazer)
library(matrixStats) # weightedMedian
library(reldist)


news <- read_csv("../results/merged_news.csv") %>% 
  filter(party %in% c("dem", "rep"))

subjects <- news %>% select(caseid, party, weight) %>% distinct()
weights <- subjects %>% select(caseid, weight)

all_stats <- read_csv("../data/data_for_usage_tables.csv")
all_stats <- all_stats %>% left_join(subjects, by = "caseid")
all_stats <- all_stats %>% filter(party %in% c("dem", "rep"))

flat_total_fracs <- all_stats %>% select(-weight, -party, -date) %>%
  group_by(caseid) %>%
  summarise_all(sum) %>% 
  mutate(
    facebook = facebook_total / all_total,
    google = google_total / all_total,
    youtube = youtube_total / all_total,
    news = news_total / all_total,
    polnews = polnews_total / all_total,
    portal = portal_total / all_total,
    nonportal = nonportal_total / all_total) %>% 
  left_join(weights, by = "caseid") %>% 
  summarise_at(vars(-weight), ~ weighted.mean(., w = weight)) %>% 
  select(polnews, facebook, google, youtube, news, portal, nonportal)

flat_dur_fracs <- all_stats %>% select(-weight, -party, -date) %>%
  group_by(caseid) %>%
  summarise_all(sum) %>% 
  mutate(
    facebook = facebook_dur / all_dur,
    google = google_dur / all_dur,
    youtube = youtube_dur / all_dur,
    news = news_dur / all_dur,
    polnews = polnews_dur / all_dur,
    portal = portal_dur / all_dur,
    nonportal = nonportal_dur / all_dur) %>% 
  left_join(weights, by = "caseid") %>% 
  summarise_at(vars(-weight), ~ weighted.mean(., w = weight)) %>% 
  select(polnews, facebook, google, youtube, news, portal, nonportal)


frac_tab <- bind_rows(flat_total_fracs, flat_dur_fracs)
frac_tab <- t(frac_tab)
colnames(frac_tab) <- c("Fraction of URLs", "Fraction of Time")
rownames(frac_tab)[1] <- "political news"
stargazer(frac_tab,
  digits = 2,
  summary = FALSE,
  style = "ajps", type = "latex",
  out = "../results/tables/dist_all_browsing.tex",
  table.placement = "H",
  label = "t:dist_all_browsing",
  title = "Average Fraction of Attention to Different Sections of the Internet (Weighted)")




num_days <- all_stats %>% pull(date) %>% unique() %>% length()

# subset to people who were online within three days of the last day
attr_robust <- all_stats %>% group_by(caseid) %>%
  summarise(last_date = max(date)) %>% 
  left_join(all_stats, ., by = "caseid") %>% 
  filter(last_date >= max(last_date) - 3)


attr_fracs <- attr_robust %>% group_by(caseid) %>%
  summarise(
    all_days = n(),
    facebook_days = sum(facebook_total > 0),
    google_days = sum(google_total > 0),
    youtube_days = sum(youtube_total > 0),
    news_days = sum(news_total > 0),
    polnews_days = sum(polnews_total > 0),
    portal_days = sum(portal_total > 0),
    nonportal_days = sum(nonportal_total > 0)) %>% 
  mutate(
    facebook = facebook_days / num_days,
    google = google_days / num_days,
    youtube = youtube_days / num_days,
    news = news_days / num_days,
    polnews = polnews_days / num_days,
    portal = portal_days / num_days,
    nonportal = nonportal_days / num_days) %>% 
  select(caseid, polnews, facebook, google, youtube,
    news, portal, nonportal) %>% 
  left_join(weights, by = "caseid") %>% 
  select(-caseid)

frac_days_tab <- bind_rows(
  attr_fracs %>% summarise_all(~ weighted.mean(., weight)),
  attr_fracs %>% summarise_all(~ wtd.quantile(., q = 0.5, weight = weight)),
  attr_fracs %>% summarise_all(~ wtd.quantile(., q = 0.25, weight = weight)),
  attr_fracs %>% summarise_all(~ wtd.quantile(., q = 0.75, weight = weight))
  ) %>% select(-weight) %>% t()
colnames(frac_days_tab) <- c("Mean", "Median", "25th Percentile", "75th Percentile")
rownames(frac_days_tab)[1] <- "political news"
stargazer(frac_days_tab,
  digits = 2,
  summary = FALSE,
  style = "ajps", type = "latex",
  label = "t:frac_days_on",
  out = "../results/tables/frac_days_on.tex",
  table.placement = "H",
  title = "Fraction of Days Visiting Different Sections of the Internet (Weighted)")


attr_avgs <- attr_robust %>% group_by(caseid) %>%
  summarise(
    facebook = sum(facebook_total) / num_days,
    google = sum(google_total) / num_days,
    youtube = sum(youtube_total) / num_days,
    news = sum(news_total) / num_days,
    polnews = sum(polnews_total) / num_days,
    dpolnews = sum(dpolnews_total) / num_days,
    portal = sum(portal_total) / num_days,
    nonportal = sum(nonportal_total) / num_days) %>% 
  left_join(weights, by = "caseid") %>% 
  select(-caseid)


avg_tab <- bind_rows(
  attr_avgs %>% summarise_all(~ weighted.mean(., weight)),
  attr_avgs %>% summarise_all(~ wtd.quantile(., q = 0.5, weight = weight)),
  attr_avgs %>% summarise_all(~ wtd.quantile(., q = 0.25, weight = weight)),
  attr_avgs %>% summarise_all(~ wtd.quantile(., q = 0.75, weight = weight))
  ) %>% select(-weight)
avg_tab <- avg_tab[, 6]  %>% t()
rownames(avg_tab) <- c("Distinct Political News URLs")
colnames(avg_tab) <- c("Mean", "Median", "25th Percentile", "75th Percentile")
stargazer(avg_tab,
  digits = 2,
  label = "t:avg_distinct_polnews",
  summary = FALSE,
  style = "ajps", type = "latex",
  out = "../results/tables/avg_distinct_polnews.tex",
  table.placement = "H",
  title = "Number of Distinct Political News URLs Visited Per Day (Weighted)")